Parameters
set.seed(235987)
assetName = 'Bitcoin'
numBootstraps = 5000
windowSizeMins = 180
Feature definition
import(db)
import(util)
df = dbGetQuery(db$getPool(), '
SELECT
ts
, asset_name
, target
, close
, regr_slope(close, extract(epoch FROM ts)::REAL)
OVER (PARTITION BY asset_id ORDER BY ts ASC ROWS 10 PRECEDING)
FROM trn
WHERE asset_name = $1
;',
params = list(assetName))
setDT(df)
Select bootstrap samples and calculate correlations
t_end = c()
corr_ts = c()
corr_close = c()
corr_regr_slope = c()
pb = util$pbar('Bootstrap sampling', numBootstraps)
pb$tick(0)
## <progress_bar>
## Public:
## clone: function (deep = FALSE)
## finished: FALSE
## initialize: function (format = "[:bar] :percent", total = 100, width = getOption("width") -
## message: function (msg, set_width = TRUE)
## terminate: function ()
## tick: function (len = 1, tokens = list())
## update: function (ratio, tokens = list())
## Private:
## callback: function (self)
## chars: list
## clear: FALSE
## clear_line: function (width)
## complete: FALSE
## current: 0
## cursor_to_start: function ()
## first: FALSE
## format: Bootstrap sampling [:bar] :current/:total (:percent) :el ...
## has_token: TRUE TRUE FALSE TRUE TRUE TRUE FALSE FALSE TRUE FALSE TRUE
## last_draw:
## message_class: NULL
## progress_message: function (..., domain = NULL, appendLF = TRUE)
## ratio: function ()
## render: function (tokens)
## show_after: 0
## spin: function ()
## start: 2021-11-27 23:54:53
## supported: FALSE
## total: 5000
## toupdate: TRUE
## width: 110
for (i in 1:numBootstraps) {
rangeEnd = sample(df[,ts], 1)
rangeStart = rangeEnd - as.difftime(windowSizeMins, units = 'mins')
dfp = df[(ts > rangeStart) & (ts <= rangeEnd),]
t_end <- append(t_end, rangeEnd)
corr_ts <- append(corr_ts,
dfp[,ts] |> as.numeric() |> cor(dfp[,target], use = "complete.obs")
)
corr_close <- append(corr_close,
cor(dfp[,close], dfp[,target], use = "complete.obs")
)
corr_regr_slope <- append(corr_regr_slope,
cor(dfp[,regr_slope], dfp[,target], use = "complete.obs")
)
pb$tick()
}
Debug: plots of the data
p1 =
df[ts < rangeEnd][ts > rangeStart] |>
melt(id.vars = 'ts', measure.vars = c('close', 'regr_slope', 'target')) |>
ggplot(aes(ts, value)) +
geom_line() +
facet_wrap(~variable, scales = "free", ncol = 1)
ggplotly(p1)
p2 =
ggplot(df, aes(regr_slope, target)) +
geom_bin_2d()
ggplotly(p2)
## Warning: Removed 305 rows containing non-finite values (stat_bin2d).
Correlation violins
dfCorrs = data.table(
t_end = t_end,
corr_ts = corr_ts,
corr_close = corr_close,
corr_regr_slope = corr_regr_slope
)
dfCorrs |>
melt(measure.vars = c("corr_ts", "corr_close", "corr_regr_slope")) |>
ggplot(aes(variable, value)) +
geom_hline(yintercept = 0) +
geom_violin()
